import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
!pip install pmdarima
Collecting pmdarima Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (7.8 kB) Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (1.4.2) Requirement already satisfied: Cython!=0.29.18,!=0.29.31,>=0.29 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (3.0.11) Requirement already satisfied: numpy>=1.21.2 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (1.26.4) Requirement already satisfied: pandas>=0.19 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (2.2.2) Requirement already satisfied: scikit-learn>=0.22 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (1.5.2) Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (1.13.1) Requirement already satisfied: statsmodels>=0.13.2 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (0.14.4) Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (2.2.3) Requirement already satisfied: setuptools!=50.0.0,>=38.6.0 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (75.1.0) Requirement already satisfied: packaging>=17.1 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (24.2) Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.19->pmdarima) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.19->pmdarima) (2024.2) Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.19->pmdarima) (2024.2) Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.22->pmdarima) (3.5.0) Requirement already satisfied: patsy>=0.5.6 in /usr/local/lib/python3.10/dist-packages (from statsmodels>=0.13.2->pmdarima) (1.0.1) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas>=0.19->pmdarima) (1.16.0) Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.1 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.1/2.1 MB 43.5 MB/s eta 0:00:00 Installing collected packages: pmdarima Successfully installed pmdarima-2.0.4
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
!pip install tensorflow
Requirement already satisfied: tensorflow in /usr/local/lib/python3.10/dist-packages (2.17.1) Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.4.0) Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.6.3) Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (24.3.25) Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.6.0) Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.2.0) Requirement already satisfied: h5py>=3.10.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.12.1) Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (18.1.1) Requirement already satisfied: ml-dtypes<0.5.0,>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.4.1) Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.4.0) Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from tensorflow) (24.2) Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (4.25.5) Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.32.3) Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tensorflow) (75.1.0) Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.16.0) Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.5.0) Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (4.12.2) Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.16.0) Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.68.0) Requirement already satisfied: tensorboard<2.18,>=2.17 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (2.17.1) Requirement already satisfied: keras>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (3.5.0) Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (0.37.1) Requirement already satisfied: numpy<2.0.0,>=1.23.5 in /usr/local/lib/python3.10/dist-packages (from tensorflow) (1.26.4) Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from astunparse>=1.6.0->tensorflow) (0.45.0) Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from keras>=3.2.0->tensorflow) (13.9.4) Requirement already satisfied: namex in /usr/local/lib/python3.10/dist-packages (from keras>=3.2.0->tensorflow) (0.0.8) Requirement already satisfied: optree in /usr/local/lib/python3.10/dist-packages (from keras>=3.2.0->tensorflow) (0.13.1) Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorflow) (3.4.0) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorflow) (3.10) Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorflow) (2.2.3) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.21.0->tensorflow) (2024.8.30) Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17->tensorflow) (3.7) Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17->tensorflow) (0.7.2) Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17->tensorflow) (3.1.3) Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard<2.18,>=2.17->tensorflow) (3.0.2) Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->keras>=3.2.0->tensorflow) (3.0.0) Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->keras>=3.2.0->tensorflow) (2.18.0) Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->keras>=3.2.0->tensorflow) (0.1.2)
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')
import kagglehub
dataset = kagglehub.dataset_download("abdallahwagih/amazon-reviews")
file_path = os.path.join(dataset, 'Cell_Phones_and_Accessories_5.json')
df = pd.read_json(file_path, lines=True)
num_reviews = len(df)
print("Number of Reviews:", num_reviews)
print("Initial DataFrame Preview:")
print(df.head())
print("\nInitial DataFrame Info:")
print(df.info())
Number of Reviews: 194439 Initial DataFrame Preview: reviewerID asin reviewerName helpful \ 0 A30TL5EWN6DFXT 120401325X christina [0, 0] 1 ASY55RVNIL0UD 120401325X emily l. [0, 0] 2 A2TMXE2AFO7ONB 120401325X Erica [0, 0] 3 AWJ0WZQYMYFQ4 120401325X JM [4, 4] 4 ATX7CZYFXI1KW 120401325X patrice m rogoza [2, 3] reviewText overall \ 0 They look good and stick good! I just don't li... 4 1 These stickers work like the review says they ... 5 2 These are awesome and make my phone look so st... 5 3 Item arrived in great time and was in perfect ... 4 4 awesome! stays on, and looks great. can be use... 5 summary unixReviewTime reviewTime 0 Looks Good 1400630400 05 21, 2014 1 Really great product. 1389657600 01 14, 2014 2 LOVE LOVE LOVE 1403740800 06 26, 2014 3 Cute! 1382313600 10 21, 2013 4 leopard home button sticker for iphone 4s 1359849600 02 3, 2013 Initial DataFrame Info: <class 'pandas.core.frame.DataFrame'> RangeIndex: 194439 entries, 0 to 194438 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 reviewerID 194439 non-null object 1 asin 194439 non-null object 2 reviewerName 190920 non-null object 3 helpful 194439 non-null object 4 reviewText 194439 non-null object 5 overall 194439 non-null int64 6 summary 194439 non-null object 7 unixReviewTime 194439 non-null int64 8 reviewTime 194439 non-null object dtypes: int64(2), object(7) memory usage: 13.4+ MB None
df['date'] = pd.to_datetime(df['unixReviewTime'], unit='s')
df.set_index('date', inplace=True)
df_monthly = df.resample('M').agg({'overall': 'mean'})
print("Number of NaN values in df_monthly['overall']:", df_monthly['overall'].isna().sum())
print("Dates with NaN values:")
print(df_monthly[df_monthly['overall'].isna()])
Number of NaN values in df_monthly['overall']: 32 Dates with NaN values: overall date 2001-03-31 NaN 2001-04-30 NaN 2001-05-31 NaN 2001-06-30 NaN 2001-07-31 NaN 2001-08-31 NaN 2001-09-30 NaN 2001-10-31 NaN 2001-11-30 NaN 2001-12-31 NaN 2002-01-31 NaN 2002-02-28 NaN 2002-03-31 NaN 2002-04-30 NaN 2002-05-31 NaN 2002-06-30 NaN 2002-07-31 NaN 2002-08-31 NaN 2002-09-30 NaN 2002-11-30 NaN 2002-12-31 NaN 2003-01-31 NaN 2003-02-28 NaN 2003-03-31 NaN 2003-04-30 NaN 2003-05-31 NaN 2003-06-30 NaN 2003-07-31 NaN 2003-08-31 NaN 2003-09-30 NaN 2003-10-31 NaN 2003-11-30 NaN
df_monthly['overall'].fillna(method='ffill', inplace=True)
plt.figure(figsize=(12,6))
plt.plot(df_monthly.index, df_monthly['overall'], label='Monthly Average Rating')
plt.title('Monthly Average Rating Over Time')
plt.xlabel('Date')
plt.ylabel('Average Rating')
plt.legend()
plt.show()
df['review_length'] = df['reviewText'].str.len()
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='review_length', y='overall', alpha=0.5)
plt.title("Review Length vs. Overall Rating")
plt.xlabel("Review Length")
plt.ylabel("Overall Rating")
plt.show()
!pip install vaderSentiment
Collecting vaderSentiment Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes) Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from vaderSentiment) (2.32.3) Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->vaderSentiment) (3.4.0) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->vaderSentiment) (3.10) Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->vaderSentiment) (2.2.3) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->vaderSentiment) (2024.8.30) Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 126.0/126.0 kB 9.3 MB/s eta 0:00:00 Installing collected packages: vaderSentiment Successfully installed vaderSentiment-3.3.2
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
df['sentiment_score'] = df['reviewText'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
monthly_sentiment = df.resample('M')['sentiment_score'].mean()
plt.figure(figsize=(12, 6))
monthly_sentiment.plot(kind='line', color='purple', marker='o')
plt.title("Monthly Average Sentiment Score")
plt.xlabel("Month")
plt.ylabel("Sentiment Score")
plt.grid(True)
plt.show()
split_point = int(len(df_monthly) * 0.8)
train = df_monthly.iloc[:split_point]
test = df_monthly.iloc[split_point:]
stepwise_model = auto_arima(train['overall'], start_p=1, start_q=1,
max_p=3, max_q=3, m=12,
start_P=0, seasonal=True,
d=None, D=1, trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
print(stepwise_model.summary())
Performing stepwise search to minimize aic ARIMA(1,0,1)(0,1,1)[12] intercept : AIC=inf, Time=3.91 sec ARIMA(0,0,0)(0,1,0)[12] intercept : AIC=239.413, Time=0.09 sec ARIMA(1,0,0)(1,1,0)[12] intercept : AIC=184.278, Time=0.23 sec ARIMA(0,0,1)(0,1,1)[12] intercept : AIC=inf, Time=0.68 sec ARIMA(0,0,0)(0,1,0)[12] : AIC=240.069, Time=0.03 sec ARIMA(1,0,0)(0,1,0)[12] intercept : AIC=196.773, Time=0.08 sec ARIMA(1,0,0)(2,1,0)[12] intercept : AIC=181.169, Time=0.58 sec ARIMA(1,0,0)(2,1,1)[12] intercept : AIC=inf, Time=3.58 sec ARIMA(1,0,0)(1,1,1)[12] intercept : AIC=inf, Time=1.43 sec ARIMA(0,0,0)(2,1,0)[12] intercept : AIC=234.171, Time=0.39 sec ARIMA(2,0,0)(2,1,0)[12] intercept : AIC=174.669, Time=0.87 sec ARIMA(2,0,0)(1,1,0)[12] intercept : AIC=178.829, Time=0.34 sec ARIMA(2,0,0)(2,1,1)[12] intercept : AIC=inf, Time=5.27 sec ARIMA(2,0,0)(1,1,1)[12] intercept : AIC=inf, Time=1.60 sec ARIMA(3,0,0)(2,1,0)[12] intercept : AIC=170.995, Time=1.01 sec ARIMA(3,0,0)(1,1,0)[12] intercept : AIC=175.289, Time=0.46 sec ARIMA(3,0,0)(2,1,1)[12] intercept : AIC=inf, Time=3.56 sec ARIMA(3,0,0)(1,1,1)[12] intercept : AIC=inf, Time=1.84 sec ARIMA(3,0,1)(2,1,0)[12] intercept : AIC=171.869, Time=1.69 sec ARIMA(2,0,1)(2,1,0)[12] intercept : AIC=171.618, Time=3.23 sec ARIMA(3,0,0)(2,1,0)[12] : AIC=169.753, Time=1.88 sec ARIMA(3,0,0)(1,1,0)[12] : AIC=173.773, Time=0.26 sec ARIMA(3,0,0)(2,1,1)[12] : AIC=inf, Time=3.62 sec ARIMA(3,0,0)(1,1,1)[12] : AIC=inf, Time=1.73 sec ARIMA(2,0,0)(2,1,0)[12] : AIC=173.785, Time=0.37 sec ARIMA(3,0,1)(2,1,0)[12] : AIC=170.512, Time=0.89 sec ARIMA(2,0,1)(2,1,0)[12] : AIC=170.284, Time=0.77 sec Best model: ARIMA(3,0,0)(2,1,0)[12] Total fit time: 40.462 seconds SARIMAX Results ========================================================================================== Dep. Variable: y No. Observations: 129 Model: SARIMAX(3, 0, 0)x(2, 1, 0, 12) Log Likelihood -78.877 Date: Wed, 04 Dec 2024 AIC 169.753 Time: 20:04:50 BIC 186.326 Sample: 02-28-2001 HQIC 176.482 - 10-31-2011 Covariance Type: opg ============================================================================== coef std err z P>|z| [0.025 0.975] ------------------------------------------------------------------------------ ar.L1 0.4106 0.049 8.444 0.000 0.315 0.506 ar.L2 0.1659 0.071 2.333 0.020 0.027 0.305 ar.L3 0.2264 0.073 3.088 0.002 0.083 0.370 ar.S.L12 -0.4135 0.084 -4.899 0.000 -0.579 -0.248 ar.S.L24 -0.2149 0.083 -2.603 0.009 -0.377 -0.053 sigma2 0.2193 0.020 10.866 0.000 0.180 0.259 =================================================================================== Ljung-Box (L1) (Q): 0.14 Jarque-Bera (JB): 112.91 Prob(Q): 0.71 Prob(JB): 0.00 Heteroskedasticity (H): 0.09 Skew: -1.10 Prob(H) (two-sided): 0.00 Kurtosis: 7.28 =================================================================================== Warnings: [1] Covariance matrix calculated using the outer product of gradients (complex-step).
stepwise_model.fit(train['overall'])
ARIMA(3,0,0)(2,1,0)[12]In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
ARIMA(3,0,0)(2,1,0)[12]
n_periods = len(test)
forecast_arima = stepwise_model.predict(n_periods=n_periods)
forecast_arima = pd.DataFrame(forecast_arima, index=test.index, columns=['Prediction_ARIMA'])
mae_arima = mean_absolute_error(test['overall'], forecast_arima['Prediction_ARIMA'])
rmse_arima = np.sqrt(mean_squared_error(test['overall'], forecast_arima['Prediction_ARIMA']))
print(f'ARIMA Model MAE: {mae_arima}')
print(f'ARIMA Model RMSE: {rmse_arima}')
ARIMA Model MAE: 0.1191128248176114 ARIMA Model RMSE: 0.14093658612023188
plt.figure(figsize=(12,6))
plt.plot(train['overall'], label='Train')
plt.plot(test['overall'], label='Test')
plt.plot(forecast_arima['Prediction_ARIMA'], label='ARIMA Prediction')
plt.title('ARIMA Model Predictions')
plt.xlabel('Date')
plt.ylabel('Average Rating')
plt.legend()
plt.show()
df_prophet = df_monthly.reset_index().rename(columns={'date':'ds', 'overall':'y'})
train_prophet = df_prophet.iloc[:split_point]
test_prophet = df_prophet.iloc[split_point:]
model_prophet = Prophet()
model_prophet.fit(train_prophet)
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. DEBUG:cmdstanpy:input tempfile: /tmp/tmpcbgt4cmv/xbzlh057.json DEBUG:cmdstanpy:input tempfile: /tmp/tmpcbgt4cmv/58mksex4.json DEBUG:cmdstanpy:idx 0 DEBUG:cmdstanpy:running CmdStan, num_threads: None DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=39527', 'data', 'file=/tmp/tmpcbgt4cmv/xbzlh057.json', 'init=/tmp/tmpcbgt4cmv/58mksex4.json', 'output', 'file=/tmp/tmpcbgt4cmv/prophet_model28zmmit_/prophet_model-20241204200624.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000'] 20:06:24 - cmdstanpy - INFO - Chain [1] start processing INFO:cmdstanpy:Chain [1] start processing 20:06:24 - cmdstanpy - INFO - Chain [1] done processing INFO:cmdstanpy:Chain [1] done processing
<prophet.forecaster.Prophet at 0x7ed681cf7e20>
future = model_prophet.make_future_dataframe(periods=n_periods, freq='M')
forecast_prophet = model_prophet.predict(future)
forecast_prophet = forecast_prophet.set_index('ds')
forecast_prophet = forecast_prophet[['yhat']]
forecast_prophet = forecast_prophet.loc[test.index]
mae_prophet = mean_absolute_error(test['overall'], forecast_prophet['yhat'])
rmse_prophet = np.sqrt(mean_squared_error(test['overall'], forecast_prophet['yhat']))
print(f'Prophet Model MAE: {mae_prophet}')
print(f'Prophet Model RMSE: {rmse_prophet}')
Prophet Model MAE: 0.138754916973121 Prophet Model RMSE: 0.17126497672464955
plt.figure(figsize=(12,6))
plt.plot(train['overall'], label='Train')
plt.plot(test['overall'], label='Test')
plt.plot(forecast_prophet['yhat'], label='Prophet Prediction')
plt.title('Prophet Model Predictions')
plt.xlabel('Date')
plt.ylabel('Average Rating')
plt.legend()
plt.show()
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(df_monthly)
def create_dataset(dataset, look_back=1):
X, Y = [], []
for i in range(len(dataset)-look_back):
X.append(dataset[i:(i+look_back), 0])
Y.append(dataset[i + look_back, 0])
return np.array(X), np.array(Y)
look_back = 3
train_size = split_point
train_data = scaled_data[:train_size]
test_data = scaled_data[train_size - look_back:]
X_train, y_train = create_dataset(train_data, look_back)
X_test, y_test = create_dataset(test_data, look_back)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
model_lstm = Sequential()
model_lstm.add(LSTM(50, return_sequences=True, input_shape=(look_back, 1)))
model_lstm.add(LSTM(50))
model_lstm.add(Dense(1))
model_lstm.compile(loss='mean_squared_error', optimizer='adam')
model_lstm.fit(X_train, y_train, epochs=50, batch_size=1, verbose=2)
Epoch 1/50 126/126 - 3s - 27ms/step - loss: 0.0595 Epoch 2/50 126/126 - 0s - 4ms/step - loss: 0.0223 Epoch 3/50 126/126 - 1s - 4ms/step - loss: 0.0267 Epoch 4/50 126/126 - 0s - 3ms/step - loss: 0.0214 Epoch 5/50 126/126 - 0s - 3ms/step - loss: 0.0227 Epoch 6/50 126/126 - 0s - 3ms/step - loss: 0.0233 Epoch 7/50 126/126 - 1s - 5ms/step - loss: 0.0209 Epoch 8/50 126/126 - 0s - 3ms/step - loss: 0.0221 Epoch 9/50 126/126 - 1s - 5ms/step - loss: 0.0194 Epoch 10/50 126/126 - 1s - 5ms/step - loss: 0.0217 Epoch 11/50 126/126 - 1s - 5ms/step - loss: 0.0204 Epoch 12/50 126/126 - 1s - 5ms/step - loss: 0.0229 Epoch 13/50 126/126 - 1s - 5ms/step - loss: 0.0193 Epoch 14/50 126/126 - 0s - 3ms/step - loss: 0.0207 Epoch 15/50 126/126 - 0s - 3ms/step - loss: 0.0199 Epoch 16/50 126/126 - 0s - 3ms/step - loss: 0.0218 Epoch 17/50 126/126 - 1s - 5ms/step - loss: 0.0212 Epoch 18/50 126/126 - 1s - 4ms/step - loss: 0.0203 Epoch 19/50 126/126 - 1s - 5ms/step - loss: 0.0200 Epoch 20/50 126/126 - 0s - 3ms/step - loss: 0.0219 Epoch 21/50 126/126 - 0s - 3ms/step - loss: 0.0187 Epoch 22/50 126/126 - 0s - 3ms/step - loss: 0.0201 Epoch 23/50 126/126 - 0s - 3ms/step - loss: 0.0205 Epoch 24/50 126/126 - 1s - 6ms/step - loss: 0.0202 Epoch 25/50 126/126 - 1s - 5ms/step - loss: 0.0186 Epoch 26/50 126/126 - 1s - 5ms/step - loss: 0.0186 Epoch 27/50 126/126 - 1s - 4ms/step - loss: 0.0202 Epoch 28/50 126/126 - 0s - 4ms/step - loss: 0.0200 Epoch 29/50 126/126 - 0s - 4ms/step - loss: 0.0201 Epoch 30/50 126/126 - 1s - 4ms/step - loss: 0.0187 Epoch 31/50 126/126 - 1s - 4ms/step - loss: 0.0186 Epoch 32/50 126/126 - 0s - 3ms/step - loss: 0.0183 Epoch 33/50 126/126 - 1s - 5ms/step - loss: 0.0178 Epoch 34/50 126/126 - 1s - 5ms/step - loss: 0.0186 Epoch 35/50 126/126 - 0s - 3ms/step - loss: 0.0182 Epoch 36/50 126/126 - 1s - 5ms/step - loss: 0.0186 Epoch 37/50 126/126 - 0s - 3ms/step - loss: 0.0193 Epoch 38/50 126/126 - 0s - 3ms/step - loss: 0.0183 Epoch 39/50 126/126 - 0s - 2ms/step - loss: 0.0196 Epoch 40/50 126/126 - 1s - 5ms/step - loss: 0.0187 Epoch 41/50 126/126 - 0s - 2ms/step - loss: 0.0176 Epoch 42/50 126/126 - 0s - 2ms/step - loss: 0.0194 Epoch 43/50 126/126 - 1s - 5ms/step - loss: 0.0173 Epoch 44/50 126/126 - 1s - 5ms/step - loss: 0.0182 Epoch 45/50 126/126 - 0s - 3ms/step - loss: 0.0189 Epoch 46/50 126/126 - 0s - 3ms/step - loss: 0.0183 Epoch 47/50 126/126 - 0s - 2ms/step - loss: 0.0175 Epoch 48/50 126/126 - 1s - 5ms/step - loss: 0.0168 Epoch 49/50 126/126 - 0s - 3ms/step - loss: 0.0191 Epoch 50/50 126/126 - 0s - 3ms/step - loss: 0.0178
<keras.src.callbacks.history.History at 0x7ed612549180>
train_predict = model_lstm.predict(X_train)
test_predict = model_lstm.predict(X_test)
4/4 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step 2/2 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
train_predict = scaler.inverse_transform(train_predict)
y_train_inv = scaler.inverse_transform([y_train])
test_predict = scaler.inverse_transform(test_predict)
y_test_inv = scaler.inverse_transform([y_test])
mae_lstm = mean_absolute_error(y_test_inv[0], test_predict[:,0])
rmse_lstm = np.sqrt(mean_squared_error(y_test_inv[0], test_predict[:,0]))
print(f'LSTM Model MAE: {mae_lstm}')
print(f'LSTM Model RMSE: {rmse_lstm}')
LSTM Model MAE: 0.06644917835631459 LSTM Model RMSE: 0.07130137136446499
test_dates = test.index[-len(test_predict):]
print("Length of test_predict:", len(test_predict))
print("Length of test_dates:", len(test_dates))
Length of test_predict: 33 Length of test_dates: 33
plt.figure(figsize=(12,6))
plt.plot(train['overall'], label='Train')
plt.plot(test['overall'], label='Test')
plt.plot(test_dates, test_predict, label='LSTM Prediction')
plt.title('LSTM Model Predictions')
plt.xlabel('Date')
plt.ylabel('Average Rating')
plt.legend()
plt.show()
performance = pd.DataFrame({
'Model': ['ARIMA', 'Prophet', 'LSTM'],
'MAE': [mae_arima, mae_prophet, mae_lstm],
'RMSE': [rmse_arima, rmse_prophet, rmse_lstm]
})
print("\nModel Performance Comparison:")
print(performance)
Model Performance Comparison: Model MAE RMSE 0 ARIMA 0.119113 0.140937 1 Prophet 0.138755 0.171265 2 LSTM 0.066449 0.071301